Alexandra Cathcart

Disaster Relief Project

K-Folds Out of Sampling Performance ( + RF/SVM )

Method KNN(k=5) LDA QDA LR RF(tuning parameter: mtry=3; ntrees=500) SVM(tuning parameter: cost=10; gamma=1; sigma = 8.691262; C = 1)
Accuracy 99.6% 98.3% 99.4% 99.5% 99.7% 99.7%
AUC 99.8% 98.7% 99.8% 99.8% 99.0% 99.4%
ROC in tabs in tabs in tabs in tabs in tabs in tabs
Threshold .66 .85 .40 .29 .50 .52
Sensitivity 93.1% 74.0% 85.1% 91.0% 95.2% 94.0%
Specificity 99.9% 99.2% 99.6% 99.8% 99.8% 99.9%
FDR 2.2% 24.6% 1.7% 4.7% 4.8% .028%
Precision 97.8% 75.4% 98.3% 95.3% 95.2% 97.2%

K-Folds Process

Setup
Values will differ from last submission due to new random sampling of the data for train/test sets.
library(tidyverse)
library(dplyr)
library(caret)
library(class)
library(yardstick)
library(plotly)
library(boot)
library(pROC)
library(glmnet)
library(purrr)
library(gridExtra)
library(randomForest)
library(e1071)
install.packages("kernlab")
#reading in the data
data <- read.csv("HaitiPixels.csv", header=TRUE ,sep=",")

data <- data %>%
  mutate(BlueClass = as.factor(ifelse(Class=="Blue Tarp","Yes", "No")))
#check the levels just specified
levels(data$BlueClass)
## [1] "No"  "Yes"
#set data var to be columns 2-5 of the set
data = data[c(2:5)]
data <- data %>% mutate(id = row_number())
#check addition
head(data$id)
## [1] 1 2 3 4 5 6
#shuffle data to fairly split into test / train
shuffleddata = sample_n(data, nrow(data))
#check that it has been shuffled
head(shuffleddata$id) #different then the first six lines of the csv file
## [1] 19746 61512 18716  7512 42719  8307
#remove the id column
shuffleddata = shuffleddata[c(1:4)]
#split the data into test and train for use in our upcoming models 
#using a 10k subset for faster knn function execution and file freezing issues
samp <- 1:10000 
samp2 <- 10001:20000
train<-shuffleddata[samp,]
test  <- shuffleddata[samp2,]
head(train)
##   Red Green Blue BlueClass
## 1  77    71   58        No
## 2 216   241  255       Yes
## 3  66    68   54        No
## 4 113   117   86        No
## 5 255   253  179        No
## 6  83    83   68        No
head(test)
##       Red Green Blue BlueClass
## 10001 255   228  186        No
## 10002 169   167  154        No
## 10003 101    97   74        No
## 10004  69    71   52        No
## 10005  77    87   54        No
## 10006 255   255  201        No
KNN
#model training rules for all models
train_control <- caret::trainControl(method="cv", number=10, returnResamp='all', classProbs=TRUE, savePredictions='final')

#KNN model
system.time({
knnmod=train(BlueClass~Red+Green+Blue,data=train,trControl=train_control,method="knn",preProcess = c("center","scale"), tuneGrid = expand.grid(k = c(1:15)))
})
##    user  system elapsed 
##  14.576   0.075  14.705
knnmod
## k-Nearest Neighbors 
## 
## 10000 samples
##     3 predictor
##     2 classes: 'No', 'Yes' 
## 
## Pre-processing: centered (3), scaled (3) 
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 9000, 9000, 9000, 9000, 9000, 9000, ... 
## Resampling results across tuning parameters:
## 
##   k   Accuracy  Kappa    
##    1  0.9978    0.9664027
##    2  0.9970    0.9547513
##    3  0.9978    0.9667094
##    4  0.9974    0.9610710
##    5  0.9974    0.9609979
##    6  0.9974    0.9608520
##    7  0.9975    0.9623564
##    8  0.9972    0.9575442
##    9  0.9975    0.9624421
##   10  0.9970    0.9549201
##   11  0.9974    0.9610211
##   12  0.9973    0.9597111
##   13  0.9974    0.9609377
##   14  0.9970    0.9547721
##   15  0.9971    0.9560582
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was k = 3.
#plot KNN model
plot(knnmod)

#set prediction, probability, and cv score variables in case needed
knnmod_pred <- predict(knnmod, test,'raw')
knnmod_prob <- predict(knnmod, test,'prob')

knnmod_scored <- cbind(test, knnmod_pred, knnmod_prob)
#AUC/ROC
options(yardstick.event_first=FALSE)

#area under the curve
knn_auc = knnmod_prob %>%
  yardstick::roc_auc(truth=test$BlueClass, Yes)
## Warning: The `yardstick.event_first` option has been deprecated as of yardstick 0.0.7 and will be completely ignored in a future version.
## Instead, set the following argument directly in the metric function:
## `options(yardstick.event_first = TRUE)`  -> `event_level = 'first'` (the default)
## `options(yardstick.event_first = FALSE)` -> `event_level = 'second'`
## This warning is displayed once per session.
knn_auc
## # A tibble: 1 x 3
##   .metric .estimator .estimate
##   <chr>   <chr>          <dbl>
## 1 roc_auc binary         0.994
#ROC curve + plot
ROC_curve<-knnmod_prob %>%
  yardstick::roc_curve(truth=test$BlueClass,estimate=Yes) %>%
  dplyr::mutate(one_minus_specificity = 1-specificity)

ROC_curve_plot <- ROC_curve %>%
  ggplot(aes(x=one_minus_specificity,y=sensitivity))+
  geom_line() + geom_point() +
  geom_abline(slope = 1,intercept = 0, linetype='dashed',color='blue')+
  xlab("one_minus_specificity\n(false positive rate)")+
  ggtitle('KNN ROC curve')
ggplotly(ROC_curve_plot)
#set threshold
knnmod_pred2 <- knnmod$pred %>% 
  #the accuracy doesn't improve by reducing the threshold any further than .66, 99.7% best. 
  mutate(prediction = ifelse(Yes>.66, 'Yes', 'No')) %>%
  mutate(prediction = factor(prediction, levels=c('No','Yes')))

#confusion matrix
confusionMatrix(knnmod_pred2$prediction, knnmod_pred2$obs, positive="Yes")
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   No  Yes
##        No  9646    8
##        Yes   14  332
##                                           
##                Accuracy : 0.9978          
##                  95% CI : (0.9967, 0.9986)
##     No Information Rate : 0.966           
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.9668          
##                                           
##  Mcnemar's Test P-Value : 0.2864          
##                                           
##             Sensitivity : 0.9765          
##             Specificity : 0.9986          
##          Pos Pred Value : 0.9595          
##          Neg Pred Value : 0.9992          
##              Prevalence : 0.0340          
##          Detection Rate : 0.0332          
##    Detection Prevalence : 0.0346          
##       Balanced Accuracy : 0.9875          
##                                           
##        'Positive' Class : Yes             
## 
LDA
#LDA model
system.time({
ldamod=train(BlueClass~Red+Green+Blue,data=train,trControl=train_control,method="lda",preProcess = c("center","scale"), family="binomial")
})
##    user  system elapsed 
##   0.722   0.006   0.734
ldamod
## Linear Discriminant Analysis 
## 
## 10000 samples
##     3 predictor
##     2 classes: 'No', 'Yes' 
## 
## Pre-processing: centered (3), scaled (3) 
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 9000, 9000, 9000, 9000, 9000, 9000, ... 
## Resampling results:
## 
##   Accuracy  Kappa    
##   0.9831    0.7540222
#set prediction, probability, and cv score variables in case needed
ldamod_pred <- predict(ldamod, test,'raw')
ldamod_prob <- predict(ldamod, test,'prob')

ldamod_scored <- cbind(test, ldamod_pred, ldamod_prob)
#AUC/ROC
options(yardstick.event_first=FALSE)

#area under the curve
lda_auc = ldamod_prob %>%
  yardstick::roc_auc(truth=test$BlueClass, Yes)
lda_auc
## # A tibble: 1 x 3
##   .metric .estimator .estimate
##   <chr>   <chr>          <dbl>
## 1 roc_auc binary         0.989
#ROC curve + plot
ROC_curve2<-ldamod_prob %>%
  yardstick::roc_curve(truth=test$BlueClass,estimate=Yes) %>%
  dplyr::mutate(one_minus_specificity = 1-specificity)

ROC_curve_plot2 <- ROC_curve2 %>%
  ggplot(aes(x=one_minus_specificity,y=sensitivity))+
  geom_line() + geom_point() +
  geom_abline(slope = 1,intercept = 0, linetype='dashed',color='blue')+
  xlab("one_minus_specificity\n(false positive rate)")+
  ggtitle('LDA ROC curve')
ggplotly(ROC_curve_plot2)
#set new threshold
ldamod_pred2 <- ldamod$pred %>% 
  #the accuracy doesn't improve by reducing the threshold any further than .85, 98.6% best.
  mutate(prediction = ifelse(Yes>.85, 'Yes', 'No')) %>%
  mutate(prediction = factor(prediction, levels=c('No','Yes')))

#new threshold matrix
confusionMatrix(ldamod_pred2$prediction, ldamod_pred2$obs, positive="Yes")
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   No  Yes
##        No  9587   79
##        Yes   73  261
##                                           
##                Accuracy : 0.9848          
##                  95% CI : (0.9822, 0.9871)
##     No Information Rate : 0.966           
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.7666          
##                                           
##  Mcnemar's Test P-Value : 0.6851          
##                                           
##             Sensitivity : 0.7676          
##             Specificity : 0.9924          
##          Pos Pred Value : 0.7814          
##          Neg Pred Value : 0.9918          
##              Prevalence : 0.0340          
##          Detection Rate : 0.0261          
##    Detection Prevalence : 0.0334          
##       Balanced Accuracy : 0.8800          
##                                           
##        'Positive' Class : Yes             
## 
QDA
#QDA model
system.time({
qdamod=train(BlueClass~Red+Green+Blue,data=train,trControl=train_control,method="qda",preProcess = c("center","scale"), family="binomial")
})
##    user  system elapsed 
##   0.707   0.007   0.718
qdamod
## Quadratic Discriminant Analysis 
## 
## 10000 samples
##     3 predictor
##     2 classes: 'No', 'Yes' 
## 
## Pre-processing: centered (3), scaled (3) 
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 9000, 9000, 9000, 9000, 9000, 9000, ... 
## Resampling results:
## 
##   Accuracy  Kappa    
##   0.9946    0.9101972
##set prediction, probability, and cv score variables in case needed
qdamod_pred <- predict(qdamod, test,'raw')
qdamod_prob <- predict(qdamod, test,'prob')

qdamod_scored <- cbind(test, qdamod_pred, qdamod_prob)
#AUC/ROC
options(yardstick.event_first=FALSE)

#area under the curve
qda_auc = qdamod_prob %>%
  yardstick::roc_auc(truth=test$BlueClass, Yes)
qda_auc
## # A tibble: 1 x 3
##   .metric .estimator .estimate
##   <chr>   <chr>          <dbl>
## 1 roc_auc binary         0.999
#ROC curve + plot
ROC_curve3<-qdamod_prob %>%
  yardstick::roc_curve(truth=test$BlueClass,estimate=Yes) %>%
  dplyr::mutate(one_minus_specificity = 1-specificity)

ROC_curve_plot3 <- ROC_curve3 %>%
  ggplot(aes(x=one_minus_specificity,y=sensitivity))+
  geom_line() + geom_point() +
  geom_abline(slope = 1,intercept = 0, linetype='dashed',color='blue')+
  xlab("one_minus_specificity\n(false positive rate)")+
  ggtitle('LDA ROC curve')
ggplotly(ROC_curve_plot3)
#set new threshold
qdamod_pred2 <- qdamod$pred %>% 
  #the accuracy doesn't improve by reducing the threshold any further than .40, 99.5% best.
  mutate(prediction = ifelse(Yes>.40, 'Yes', 'No')) %>%
  mutate(prediction = factor(prediction, levels=c('No','Yes')))

#new threshold matrix
confusionMatrix(qdamod_pred2$prediction, qdamod_pred2$obs, positive="Yes")
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   No  Yes
##        No  9658   52
##        Yes    2  288
##                                          
##                Accuracy : 0.9946         
##                  95% CI : (0.993, 0.9959)
##     No Information Rate : 0.966          
##     P-Value [Acc > NIR] : < 2.2e-16      
##                                          
##                   Kappa : 0.9115         
##                                          
##  Mcnemar's Test P-Value : 2.592e-11      
##                                          
##             Sensitivity : 0.8471         
##             Specificity : 0.9998         
##          Pos Pred Value : 0.9931         
##          Neg Pred Value : 0.9946         
##              Prevalence : 0.0340         
##          Detection Rate : 0.0288         
##    Detection Prevalence : 0.0290         
##       Balanced Accuracy : 0.9234         
##                                          
##        'Positive' Class : Yes            
## 
LR
#GLM model
system.time({
glmmod=train(BlueClass~Red+Green+Blue,data=train,trControl=train_control,method="glm",preProcess = c("center","scale"), family="binomial")
})
##    user  system elapsed 
##   1.264   0.032   1.303
glmmod
## Generalized Linear Model 
## 
## 10000 samples
##     3 predictor
##     2 classes: 'No', 'Yes' 
## 
## Pre-processing: centered (3), scaled (3) 
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 9000, 9000, 9000, 9000, 9000, 9000, ... 
## Resampling results:
## 
##   Accuracy  Kappa    
##   0.9956    0.9298505
##et prediction, probability, and cv score variables in case needed
glmmod_pred <- predict(glmmod, test,'raw')
glmmod_prob <- predict(glmmod, test,'prob')

glmmod_scored <- cbind(test, glmmod_pred, glmmod_prob)
#AUC/ROC
options(yardstick.event_first=FALSE)

#area under the curve
qda_auc = glmmod_prob %>%
  yardstick::roc_auc(truth=test$BlueClass, Yes)
qda_auc
## # A tibble: 1 x 3
##   .metric .estimator .estimate
##   <chr>   <chr>          <dbl>
## 1 roc_auc binary         0.998
#ROC curve + plot
ROC_curve4<-glmmod_prob %>%
  yardstick::roc_curve(truth=test$BlueClass,estimate=Yes) %>%
  dplyr::mutate(one_minus_specificity = 1-specificity)

ROC_curve_plot4 <- ROC_curve4 %>%
  ggplot(aes(x=one_minus_specificity,y=sensitivity))+
  geom_line() + geom_point() +
  geom_abline(slope = 1,intercept = 0, linetype='dashed',color='blue')+
  xlab("one_minus_specificity\n(false positive rate)")+
  ggtitle('LDA ROC curve')
ggplotly(ROC_curve_plot4)
#set new threshold
glmmod_pred2 <- glmmod$pred %>% 
  #the accuracy doesn't improve by reducing the threshold any further than .29, 99.6% best.
  mutate(prediction = ifelse(Yes>.29, 'Yes', 'No')) %>%
  mutate(prediction = factor(prediction, levels=c('No','Yes')))

#new threshold matrix
confusionMatrix(glmmod_pred2$prediction, glmmod_pred2$obs, positive="Yes")
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   No  Yes
##        No  9646   30
##        Yes   14  310
##                                           
##                Accuracy : 0.9956          
##                  95% CI : (0.9941, 0.9968)
##     No Information Rate : 0.966           
##     P-Value [Acc > NIR] : < 2e-16         
##                                           
##                   Kappa : 0.9315          
##                                           
##  Mcnemar's Test P-Value : 0.02374         
##                                           
##             Sensitivity : 0.9118          
##             Specificity : 0.9986          
##          Pos Pred Value : 0.9568          
##          Neg Pred Value : 0.9969          
##              Prevalence : 0.0340          
##          Detection Rate : 0.0310          
##    Detection Prevalence : 0.0324          
##       Balanced Accuracy : 0.9552          
##                                           
##        'Positive' Class : Yes             
## 
RF
#Random Forest model

#Choosing tuning parameters:
#https://discuss.analyticsvidhya.com/t/how-to-decide-no-of-ntrees-in-randomforest/6882/3
#https://rpubs.com/phamdinhkhanh/389752

#Create control function for training with 10 folds and keep 3 folds for training.
train_control <- caret::trainControl(method="cv", number=10, returnResamp='all', classProbs=TRUE, savePredictions='final')

#https://stackoverflow.com/questions/10085806/extracting-specific-columns-from-a-data-frame
df<- train %>%
  select(Red,Green,Blue)

#mtryStart defaults at sqrt(p)
#my available threshold for mtry values is pretty low based on the size of my dataset 
(tuneRF(df,train$BlueClass,mtry = 5, ntree = 500, stepFactor=5, improve=0.05,
       trace=TRUE, plot=TRUE, doBest=TRUE))
## Warning in randomForest.default(x, y, mtry = mtryStart, ntree = ntreeTry, :
## invalid mtry: reset to within valid range

## mtry = 5  OOB error = 0.28% 
## Searching left ...
## mtry = 1     OOB error = 0.27% 
## 0.03571429 0.05 
## Searching right ...
## mtry = 3     OOB error = 0.28% 
## 0 0.05

## 
## Call:
##  randomForest(x = x, y = y, mtry = res[which.min(res[, 2]), 1]) 
##                Type of random forest: classification
##                      Number of trees: 500
## No. of variables tried at each split: 1
## 
##         OOB estimate of  error rate: 0.27%
## Confusion matrix:
##       No Yes class.error
## No  9651   9 0.000931677
## Yes   18 322 0.052941176
#mtry = 3

##------

tunegrid <- expand.grid(.mtry = 3)
modellist <- list()
#train with different ntree parameters and inspect bias/variance tradeoff 
#findtrees1 <- train(BlueClass~Red+Green+Blue, 
 #              data=train,
  #             method = 'rf',
   #            metric = 'Accuracy',
    #           tuneGrid = tunegrid,
     #          trControl = control,
      #         ntree = 50)
#findtrees1

#findtrees2 <- train(BlueClass~Red+Green+Blue, 
 #              data=train,
  #             method = 'rf',
   #            metric = 'Accuracy',
    #           tuneGrid = tunegrid,
     #          trControl = control,
      #         ntree = 100)
#findtrees2

system.time({
RF <- train(BlueClass~Red+Green+Blue, 
               data=train,
               method = 'rf',
               metric = 'Accuracy',
               tuneGrid = tunegrid,
               trControl = train_control,
               ntree = 500)
})
##    user  system elapsed 
##  11.778   1.213  13.040
RF
## Random Forest 
## 
## 10000 samples
##     3 predictor
##     2 classes: 'No', 'Yes' 
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 9000, 9000, 9000, 9000, 9000, 9000, ... 
## Resampling results:
## 
##   Accuracy  Kappa    
##   0.997     0.9544314
## 
## Tuning parameter 'mtry' was held constant at a value of 3
##et prediction, probability, and cv score variables in case needed
rfmod_pred <- predict(RF, test,'raw')
rfmod_prob <- predict(RF, test,'prob')
rfmod_scored <- cbind(test, rfmod_pred, rfmod_prob)
#AUC/ROC
options(yardstick.event_first=FALSE)

#area under the curve
rf_auc = rfmod_prob %>%
  yardstick::roc_auc(truth=test$BlueClass, Yes)
rf_auc
## # A tibble: 1 x 3
##   .metric .estimator .estimate
##   <chr>   <chr>          <dbl>
## 1 roc_auc binary         0.991
#ROC curve + plot
ROC_curve4<-rfmod_prob %>%
  yardstick::roc_curve(truth=test$BlueClass,estimate=Yes) %>%
  dplyr::mutate(one_minus_specificity = 1-specificity)

ROC_curve_plot4 <- ROC_curve4 %>%
  ggplot(aes(x=one_minus_specificity,y=sensitivity))+
  geom_line() + geom_point() +
  geom_abline(slope = 1,intercept = 0, linetype='dashed',color='blue')+
  xlab("one_minus_specificity\n(false positive rate)")+
  ggtitle('LDA ROC curve')
ggplotly(ROC_curve_plot4)
#set new threshold
rfmod_pred2 <- RF$pred %>% 
  #the accuracy doesn't improve by reducing the threshold any further than .50, 99.7% best.
  mutate(prediction = ifelse(Yes>.50, 'Yes', 'No')) %>%
  mutate(prediction = factor(prediction, levels=c('No','Yes')))

#new threshold matrix
confusionMatrix(rfmod_pred2$prediction, rfmod_pred2$obs, positive="Yes")
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   No  Yes
##        No  9644   14
##        Yes   16  326
##                                          
##                Accuracy : 0.997          
##                  95% CI : (0.9957, 0.998)
##     No Information Rate : 0.966          
##     P-Value [Acc > NIR] : <2e-16         
##                                          
##                   Kappa : 0.9545         
##                                          
##  Mcnemar's Test P-Value : 0.8551         
##                                          
##             Sensitivity : 0.9588         
##             Specificity : 0.9983         
##          Pos Pred Value : 0.9532         
##          Neg Pred Value : 0.9986         
##              Prevalence : 0.0340         
##          Detection Rate : 0.0326         
##    Detection Prevalence : 0.0342         
##       Balanced Accuracy : 0.9786         
##                                          
##        'Positive' Class : Yes            
## 
SVM
#Choosing tuning parameters:
#Linear
set.seed(1)
tune.out.linear=tune(svm,BlueClass~Red+Green+Blue,data=train,kernel="linear",ranges=list(cost=c(0.001, 0.01, 0.1, 1,5,10,100)))

#Radial
tune.out.radial=tune(svm, BlueClass~Red+Green+Blue,data=train, kernel="radial", ranges=list(cost=c(0.1,1,10,100,1000),gamma=c(0.5,1,2,3,4)))
#lowest error is radial cost 10 gamma 1

#Poly
tune.out.poly=tune(svm,BlueClass~Red+Green+Blue,data=train, kernel="polynomial", ranges=list(cost=c(0.1,1,10,100,1000),degree=c(1,2,3,4,5)))

##------

system.time({
svmmod <- train(BlueClass~Red+Green+Blue, 
               data=train,
               method = 'svmRadial',
               metric = 'Accuracy',
               trControl = train_control,
               cost = 10,
               gamma = 1,
               preProcess = c("center","scale")
               )
})
##    user  system elapsed 
##  15.985   0.124  16.179
#Set prediction, probability, and cv score variables in case needed
svmmod_pred <- predict(svmmod, test,'raw')
svmmod_prob <- predict(svmmod, test,'prob')
svmmod_scored <- cbind(test, svmmod_pred, svmmod_prob)
#AUC/ROC
options(yardstick.event_first=FALSE)

#area under the curve
svmmod_auc = svmmod_prob %>%
  yardstick::roc_auc(truth=test$BlueClass, Yes)
svmmod_auc
## # A tibble: 1 x 3
##   .metric .estimator .estimate
##   <chr>   <chr>          <dbl>
## 1 roc_auc binary         0.999
#ROC curve + plot
ROC_curve4<-svmmod_prob %>%
  yardstick::roc_curve(truth=test$BlueClass,estimate=Yes) %>%
  dplyr::mutate(one_minus_specificity = 1-specificity)

ROC_curve_plot4 <- ROC_curve4 %>%
  ggplot(aes(x=one_minus_specificity,y=sensitivity))+
  geom_line() + geom_point() +
  geom_abline(slope = 1,intercept = 0, linetype='dashed',color='blue')+
  xlab("one_minus_specificity\n(false positive rate)")+
  ggtitle('LDA ROC curve')
ggplotly(ROC_curve_plot4)
#set new threshold
svmmod_pred2 <- svmmod$pred %>% 
  #the accuracy doesn't improve by reducing the threshold any further than .52, 99.7% best.
  mutate(prediction = ifelse(Yes>.52, 'Yes', 'No')) %>%
  mutate(prediction = factor(prediction, levels=c('No','Yes')))

#new threshold matrix
confusionMatrix(svmmod_pred2$prediction, svmmod_pred2$obs, positive="Yes")
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   No  Yes
##        No  9649   15
##        Yes   11  325
##                                           
##                Accuracy : 0.9974          
##                  95% CI : (0.9962, 0.9983)
##     No Information Rate : 0.966           
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.9602          
##                                           
##  Mcnemar's Test P-Value : 0.5563          
##                                           
##             Sensitivity : 0.9559          
##             Specificity : 0.9989          
##          Pos Pred Value : 0.9673          
##          Neg Pred Value : 0.9984          
##              Prevalence : 0.0340          
##          Detection Rate : 0.0325          
##    Detection Prevalence : 0.0336          
##       Balanced Accuracy : 0.9774          
##                                           
##        'Positive' Class : Yes             
## 
Tuning parameter selection process
  1. Interpretation of your chosen tuning parameter values:
  1. Explanation of how your chosen tuning parameter values were chosen:
Conclusions
  1. A discussion of the best performing algorithm(s) in the cross-validation and hold-out data
  1. A recommendation and rationale regarding which algorithm to use for detection of blue tarps
  1. Additional thoughts and questions: